In [91]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
In [167]:
import os

file_path = r"/content/spotify.csv"
if os.path.exists(file_path):
    print("File found")
else:
    print("File NOT found.")
File found!
In [39]:
file_path = "/content/Wiki-Vote.txt"  # change this to your actual file path

edges = []  # will store tuples of the form (fromNode, toNode)
node = []
with open(file_path, "r", encoding="utf-8") as f:
    for line in f:
        # Remove leading/trailing whitespace
        line = line.strip()

        # Skip empty lines or lines starting with '#'
        if not line or line.startswith('#'):
            continue

        # Split into parts by whitespace
        parts = line.split()

        # If this line is supposed to hold two values, parse them
        if len(parts) == 2:
            from_node, to_node = map(int, parts)
            edges.append((from_node, to_node))

print("Number of edges:", len(edges))
print("First 5 edges:", edges[:5])
Number of edges: 103689
First 5 edges: [(30, 1412), (30, 3352), (30, 5254), (30, 5543), (30, 7478)]
In [43]:
wiki_vote_path = "/content/Wiki-Vote.txt"
G_wiki = nx.read_edgelist(wiki_vote_path, create_using=nx.DiGraph(), comments="#", nodetype=int)

# Calculate number of nodes and edges
num_nodes = G_wiki.number_of_nodes()
num_edges = G_wiki.number_of_edges()

print(f"Number of nodes  {num_nodes},")
print(f"Number of edges  {num_edges},")
Number of nodes  7115,
Number of edges  103689,
In [44]:
# Create the directed graph for Wiki-Vote data
G = G_wiki


pos = nx.spring_layout(G, seed=42)

# Set figure size and title
size = 20
title = "Wikipedia Votes Network (Adminship Voting)"

fig, ax = plt.subplots(figsize=(size, size - 4))

# Draw nodes
options = {"edgecolors": "tab:gray", "node_size": 25, "alpha": 0.75}
nx.draw_networkx_nodes(G, pos, node_color=["#76c8c8"], **options)

# Draw edges (directed)
nx.draw_networkx_edges(G, pos, edge_color="gray", width=0.5, alpha=0.3, arrows=True, arrowsize=6)

# Plot settings
plt.tight_layout()
plt.title(title, fontsize=20)
plt.axis("off")
plt.show()
No description has been provided for this image

Network Analysis Degree Distribution Analysis

In [45]:
def degree_dist(G,title):
  # Degree Distribution:
  degrees = [deg for _, deg in G.degree()]
  plt.hist(degrees, bins=15, edgecolor='black')
  plt.xlabel("Degree")
  plt.ylabel("Frequency")
  plt.title(title)
  plt.show()
degree_dist(G,"Degree Distribution of Real Graph")
No description has been provided for this image
In [49]:
def concomp(G):
    num_components = nx.number_weakly_connected_components(G)
    largest_cc = max(nx.weakly_connected_components(G), key=len)
    largest_subgraph = G.subgraph(largest_cc).copy()
    print(f"Number of weakly connected components: {num_components}")
    print(f"Largest component size: {len(largest_cc)}")
    return largest_subgraph
largest_subgraph = concomp(G)
Number of weakly connected components: 24
Largest component size: 7066
In [56]:
UG = largest_subgraph.to_undirected()


if nx.is_connected(UG):
    avg_path_len = nx.average_shortest_path_length(UG)
    diameter = nx.diameter(UG)

    print(f"Average shortest path length: {avg_path_len}")
    print(f"Diameter: {diameter}")
else:
    print("Subgraph is not connected. Cannot compute path-based metrics.")
Average shortest path length: 3.247509990226615
Diameter: 7
In [156]:
average_clustering = nx.average_clustering(G)
density = nx.density(G)
print(f"Average clustering coefficient : {average_clustering}")
print(f"Network Density: {density}")
Average clustering coefficient : 0.5085087124404498
Network Density: 0.001981599433828733

Centrality analysis

Degree Centrality

In [66]:
def degree_centrality(G):
   degree_centrality=nx.degree_centrality(G)
   most_active =sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
   print("Top 10 most active nodes by degree centrality :")
   for node, centrality in most_active:
       print(f"node {node}, Centrality: {centrality:.10f}")
degree_centrality(G)
Top 10 most active nodes by degree centrality :
node 2565, Centrality: 0.1507430998
node 766, Centrality: 0.1094125973
node 11, Centrality: 0.1051663128
node 1549, Centrality: 0.1047416844
node 457, Centrality: 0.1036093418
node 1166, Centrality: 0.0973814579
node 2688, Centrality: 0.0874734607
node 1374, Centrality: 0.0754423213
node 1151, Centrality: 0.0731776362
node 5524, Centrality: 0.0700636943

Betweenness Centrality

In [67]:
def between_centrality(G):
   betweenness_centrality=nx.betweenness_centrality(G)
   top_betweenness =sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
   print("Top 10 most active nodes by degree centrality :")
   for node, centrality in top_betweenness:
       print(f"node {node}, Centrality: {centrality:.10f}")
between_centrality(G)
Top 10 most active nodes by degree centrality :
node 2565, Centrality: 0.0621102429
node 11, Centrality: 0.0361871579
node 457, Centrality: 0.0359790207
node 4037, Centrality: 0.0289607164
node 1549, Centrality: 0.0264972305
node 766, Centrality: 0.0257051716
node 1166, Centrality: 0.0248066639
node 15, Centrality: 0.0203233473
node 1374, Centrality: 0.0193799230
node 2237, Centrality: 0.0152684815

Closeness

In [69]:
def closeness_centrality(G):
   closeness_centrality=nx.closeness_centrality(G)
   top_closeness =sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
   print("Top 10 most active nodes by degree centrality :")
   for node, centrality in  top_closeness:
       print(f"node {node}, Centrality: {centrality:.10f}")
closeness_centrality(G)
Top 10 most active nodes by degree centrality :
node 2565, Centrality: 0.4907954151
node 766, Centrality: 0.4701537233
node 457, Centrality: 0.4698410587
node 1549, Centrality: 0.4690923577
node 1166, Centrality: 0.4689055552
node 1374, Centrality: 0.4532623340
node 11, Centrality: 0.4519286126
node 1151, Centrality: 0.4471235998
node 2688, Centrality: 0.4448992443
node 2485, Centrality: 0.4375154818

Eigenvector centrality

In [70]:
def eigen_centrality(G):
   eigenvector_centrality=nx.eigenvector_centrality(G)
   top_eigenvector =sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
   print("Top 10 most active nodes by degree centrality :")
   for node, centrality in top_eigenvector:
       print(f"node {node}, Centrality: {centrality:.10f}")
eigen_centrality(G)
Top 10 most active nodes by degree centrality :
node 2565, Centrality: 0.1576876546
node 766, Centrality: 0.1301506260
node 1549, Centrality: 0.1293987753
node 1166, Centrality: 0.1195107407
node 2688, Centrality: 0.1100709104
node 457, Centrality: 0.1100008669
node 3352, Centrality: 0.0917856212
node 11, Centrality: 0.0895923462
node 1151, Centrality: 0.0871942492
node 1374, Centrality: 0.0869397259

Page rank

In [71]:
def pagerank_centrality(G):
   pagerank=nx.pagerank(G)
   top_pagerank =sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:10]
   print("Top 10 most active nodes by degree centrality :")
   for node, centrality in top_pagerank:
       print(f"node {node}, Centrality: {centrality:.10f}")
pagerank_centrality(G)
Top 10 most active nodes by degree centrality :
node 2565, Centrality: 0.0043588671
node 11, Centrality: 0.0030333278
node 766, Centrality: 0.0029809543
node 457, Centrality: 0.0029772160
node 4037, Centrality: 0.0029051737
node 1549, Centrality: 0.0028716081
node 1166, Centrality: 0.0026808432
node 2688, Centrality: 0.0023925463
node 15, Centrality: 0.0021808684
node 1374, Centrality: 0.0021427121

Removing random node

In [83]:
def rem_nodes(G, num_remove, title):
    G_copy = G.copy()
    G_copy.remove_nodes_from(num_remove)

    # Get components based on graph type
    if G.is_directed():
        components = sorted(nx.weakly_connected_components(G_copy), key=len, reverse=True)
    else:
        components = sorted(nx.connected_components(G_copy), key=len, reverse=True)

    color_palette = plt.get_cmap("tab20").colors
    fig, ax = plt.subplots(figsize=(18, 14))

    for i, component in enumerate(components[:10]):  # Top 10 largest components
        subgraph = G_copy.subgraph(component)
        pos = nx.spring_layout(subgraph, k=0.1, seed=42)
        node_color = color_palette[i % len(color_palette)]

        # Draw nodes and edges
        nx.draw_networkx_nodes(subgraph, pos, node_color=node_color,
                               edgecolors="tab:gray", node_size=25, alpha=0.75, ax=ax)
        nx.draw_networkx_edges(subgraph, pos, width=1.0, alpha=0.5, ax=ax)

    ax.set_title(title, fontsize=16)
    ax.axis("off")
    plt.show()

# Example usage
remove = np.random.choice(list(G.nodes()), size=100, replace=False).tolist()
rem_nodes(G, remove, "Effect of Removing Random Nodes")
/usr/local/lib/python3.11/dist-packages/networkx/drawing/nx_pylab.py:457: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.
  node_collection = ax.scatter(
No description has been provided for this image

removing high centrality node

In [85]:
def rem_nodes(G):
    pagerank = nx.pagerank(G)
    top = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:100]
    G_copy = G.copy()

    nodes_remove = []
    for node, x in top:
      nodes_remove.append(node)
    G_copy.remove_nodes_from(nodes_remove)

    components = list(nx.connected_components(G_copy))
    largest_component = None
    if components:
        largest_component = max(components, key=len)
    component_colors = {}
    for i, component in enumerate(components):
        component_colors[tuple(component)] = f"C{i}"

    size = 20
    fig, ax = plt.subplots(figsize=(size, size-4))

    title = "Effect of removing nodes with highest page rank centrality"

    if not components:
        return "No connected components"
    for i, component in enumerate(components):
        subgraph = G_copy.subgraph(component)
        pos = nx.spring_layout(subgraph, k=0.1, seed=42)
        options = {"edgecolors": "tab:gray", "node_size": 25, "alpha": 0.75}
        nx.draw_networkx_nodes(subgraph, pos, node_color=component_colors[tuple(component)], **options)
        nx.draw_networkx_edges(subgraph, pos, width=1.0, alpha=0.5)
    ax.set_title(title)
    plt.show()

rem_nodes(G)
No description has been provided for this image

Comparison with Random Graphs

Erdos Renyi

In [87]:
n = G.number_of_nodes()
m = G.number_of_edges()
p = m / (n * (n - 1))
k = max(1, int(m / n))


ER = nx.gnp_random_graph(n, p, directed=True)
BA = nx.barabasi_albert_graph(n, k)
WS = nx.watts_strogatz_graph(n, k, 0.1)


def get_largest_component_subgraph(G):
    if G.is_directed():
        component = max(nx.weakly_connected_components(G), key=len)
    else:
        component = max(nx.connected_components(G), key=len)
    return G.subgraph(component).copy()

# Extract largest components
wiki_sub = get_largest_component_subgraph(G)
ER_sub = get_largest_component_subgraph(ER)
BA_sub = get_largest_component_subgraph(BA)
WS_sub = get_largest_component_subgraph(WS)

# Plotting
fig, axs = plt.subplots(2, 2, figsize=(14, 12))

axs[0, 0].set_title("Wiki-Vote")
nx.draw(wiki_sub, ax=axs[0, 0], node_size=20, edge_color='gray', with_labels=False)

axs[0, 1].set_title("Erdős–Rényi")
nx.draw(ER_sub, ax=axs[0, 1], node_size=20, edge_color='gray', with_labels=False)

axs[1, 0].set_title("Barabási–Albert")
nx.draw(BA_sub, ax=axs[1, 0], node_size=20, edge_color='gray', with_labels=False)

axs[1, 1].set_title("Watts–Strogatz")
nx.draw(WS_sub, ax=axs[1, 1], node_size=20, edge_color='gray', with_labels=False)

plt.suptitle("Comparison of Real and Synthetic Network Models", fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()
No description has been provided for this image

average degree for all grapg

In [163]:
def compute_average_degree(graph):

    degrees = dict(graph.degree())

    average = sum(degrees.values()) / len(degrees)

    top_node = max(degrees, key=degrees.get)

    top_value = degrees[top_node]

    return round(average, 2), top_node, highest_value

def average_degree_report():
    graph_dict = {
        "Erdős–Rényi": ER,
        "Barabási–Albert": BA,
        "Watts–Strogatz": WS
    }

    results = []

    for name, graph in graph_dict.items():
        avg_degree, top_node, Highest_value = compute_average_degree(graph)
        results.append({
            "Graph": name,
            "Average Degree": avg_degree,
            "Top Degree Node": f"{top_node} ({highest_value})"
        })

    df = pd.DataFrame(results).set_index("Graph")
    print(df)

# Run the report
average_degree_report()
                 Average Degree           Top Degree Node
Graph                                                    
Erdős–Rényi               28.49  493 (0.7575757575757576)
Barabási–Albert           27.94   19 (0.7575757575757576)
Watts–Strogatz            14.00  371 (0.7575757575757576)
In [98]:
def longest_shortest_path(G):
    if G.is_directed():
        G = G.to_undirected()

    if not nx.is_connected(G):
        largest_cc = max(nx.connected_components(G), key=len)
        G = G.subgraph(largest_cc).copy()

    diameter = nx.diameter(G)
    return diameter

W= []
for name, G in graphs.items():
    try:
        d = longest_shortest_path(G)
        W.append({
            "Graph": name,
            "Diameter (Longest Shortest Path)": d
        })
    except Exception as e:
        W.append({
            "Graph": name,
            "Diameter (Longest Shortest Path)": f"Error: {str(e)}"
        })

# Display as DataFrame
df_diameter = pd.DataFrame(W).set_index("Graph")
print(df_diameter)
                 Diameter (Longest Shortest Path)
Graph                                            
Erdős–Rényi                                     4
Barabási–Albert                                 4
Watts–Strogatz                                  7

Longest shortes path with diameter

In [120]:
import pandas as pd
import networkx as nx

def longest_shortest_path(G):
    # Convert directed to undirected for proper path calculations
    if G.is_directed():
        G = G.to_undirected()

    # If the graph is connected
    if nx.is_connected(G):
        average_path = nx.average_shortest_path_length(G)
        diameter = nx.diameter(G)
    else:

        largest_cc = max(nx.connected_components(G), key=len)

        G = G.subgraph(largest_cc).copy()

        average_path = nx.average_shortest_path_length(G)

        diameter = nx.diameter(G)

    return average_path, diameter


W = []
for name, G in graphs.items():

    avg_path, diam = longest_shortest_path(G)

    W.append({
        "Graph": name,
        "Average Shortest Path": round(avg_path, 2),
        "Diameter": diam
    })

df_longest_shortest_path = pd.DataFrame(W).set_index("Graph")
print(df_longest_shortest_path)
                 Average Shortest Path  Diameter
Graph                                           
Erdős–Rényi                       2.93         4
Barabási–Albert                   2.82         4
Watts–Strogatz                    4.92         7

Clustering For all Graphs

In [122]:
for name, G in graphs.items():
  print(name, nx.average_clustering(G))
Erdős–Rényi 0.001944590224399168
Barabási–Albert 0.018214373216336132
Watts–Strogatz 0.5085087124404498

Degree Centrality

In [161]:
W={}
for name, G in graphs.items():
  top10 = sorted(nx.degree_centrality(G).items(), key = lambda x: x[1], reverse=True)[:10]
  W[name] = [f"{score:.4f} ({node})" for node, score in top10]

df = pd.DataFrame(W, index=["Top 1", "Top 2", "Top 3", "Top 4", "Top 5","Top6","Top7","Top8","Top9","Top10"]).T
print(df)
                        Top 1          Top 2          Top 3          Top 4  \
Erdős–Rényi      0.0072 (493)  0.0071 (1404)  0.0071 (1564)  0.0069 (5123)   
Barabási–Albert   0.0786 (19)    0.0713 (16)    0.0703 (15)    0.0701 (18)   
Watts–Strogatz   0.0027 (371)  0.0027 (1316)   0.0025 (190)   0.0025 (750)   

                         Top 5           Top6           Top7           Top8  \
Erdős–Rényi      0.0068 (6364)  0.0067 (3709)  0.0067 (6032)   0.0065 (344)   
Barabási–Albert    0.0644 (17)    0.0599 (21)    0.0559 (20)    0.0545 (35)   
Watts–Strogatz   0.0025 (1629)  0.0025 (2519)  0.0025 (3083)  0.0025 (3246)   

                          Top9          Top10  
Erdős–Rényi       0.0065 (419)  0.0065 (5646)  
Barabási–Albert    0.0527 (24)    0.0485 (27)  
Watts–Strogatz   0.0025 (3570)  0.0025 (3714)  

Closeness Centrality

In [164]:
def top_closeness_nodes(graph, top_n=10):

    closeness = nx.closeness_centrality(graph)
    top_nodes = sorted(closeness.items(), key=lambda x: x[1], reverse=True)[:top_n]
    return [f"{score:.4f} ({node})" for node, score in top_nodes]

def closeness_centrality_report(graphs_dict):
    top_n = 10
    closeness_data = {}

    for name, graph in graphs_dict.items():
        closeness_data[name] = top_closeness_nodes(graph, top_n)

    lable = [f"Top {i+1}" for i in range(top_n)]
    df = pd.DataFrame(closeness_data, index=lable).T
    print(df)

# Run the report
closeness_centrality_report(graphs)
                         Top 1          Top 2          Top 3          Top 4  \
Erdős–Rényi      0.3013 (4845)   0.3005 (319)  0.2989 (5863)   0.2988 (493)   
Barabási–Albert    0.5120 (19)    0.5090 (15)    0.5089 (16)    0.5063 (18)   
Watts–Strogatz   0.2258 (6568)  0.2256 (1405)  0.2251 (3659)  0.2238 (1629)   

                         Top 5          Top 6          Top 7          Top 8  \
Erdős–Rényi      0.2985 (1404)   0.2976 (959)  0.2972 (1564)  0.2970 (5511)   
Barabási–Albert    0.5044 (17)    0.4975 (20)    0.4973 (21)     0.4904 (0)   
Watts–Strogatz    0.2230 (829)  0.2228 (3714)  0.2221 (3710)  0.2221 (6534)   

                         Top 9         Top 10  
Erdős–Rényi      0.2968 (5046)  0.2968 (2864)  
Barabási–Albert    0.4902 (27)    0.4891 (35)  
Watts–Strogatz   0.2213 (3127)  0.2210 (6905)  
In [134]:
W={}
for name, G in graphs.items():
  top10 = sorted(nx.closeness_centrality(G).items(), key = lambda x: x[1], reverse=True)[:10]
  W[name] = [f"{score:.4f} ({node})" for node, score in top10]

df = pd.DataFrame(W, index=["Top 1", "Top 2", "Top 3", "Top 4", "Top 5","Top6","Top7","Top8","Top9","Top10"]).T
print(df)
                         Top 1          Top 2          Top 3          Top 4  \
Erdős–Rényi      0.3013 (4845)   0.3005 (319)  0.2989 (5863)   0.2988 (493)   
Barabási–Albert    0.5120 (19)    0.5090 (15)    0.5089 (16)    0.5063 (18)   
Watts–Strogatz   0.2258 (6568)  0.2256 (1405)  0.2251 (3659)  0.2238 (1629)   

                         Top 5           Top6           Top7           Top8  \
Erdős–Rényi      0.2985 (1404)   0.2976 (959)  0.2972 (1564)  0.2970 (5511)   
Barabási–Albert    0.5044 (17)    0.4975 (20)    0.4973 (21)     0.4904 (0)   
Watts–Strogatz    0.2230 (829)  0.2228 (3714)  0.2221 (3710)  0.2221 (6534)   

                          Top9          Top10  
Erdős–Rényi      0.2968 (5046)  0.2968 (2864)  
Barabási–Albert    0.4902 (27)    0.4891 (35)  
Watts–Strogatz   0.2213 (3127)  0.2210 (6905)  

Betweenness Centrality

In [137]:
r={}
for name, G in graphs.items():
  top10 = sorted(nx.betweenness_centrality(G).items(), key = lambda x: x[1], reverse=True)[:10]
  r[name] = [f"{score:.4f} ({node})" for node, score in top10]

df = pd.DataFrame(r, index=["Top 1", "Top 2", "Top 3", "Top 4", "Top 5","Top6","Top7","Top8","Top9","Top10"]).T
print(df)
                         Top 1          Top 2          Top 3          Top 4  \
Erdős–Rényi       0.0011 (493)  0.0011 (1564)  0.0010 (5123)  0.0010 (6364)   
Barabási–Albert    0.0358 (19)    0.0302 (16)    0.0302 (18)    0.0300 (15)   
Watts–Strogatz   0.0031 (1629)  0.0031 (3714)  0.0029 (3710)  0.0028 (1405)   

                         Top 5           Top6           Top7           Top8  \
Erdős–Rényi      0.0010 (6581)   0.0009 (419)  0.0009 (1404)    0.0009 (71)   
Barabási–Albert    0.0259 (17)    0.0227 (21)    0.0213 (20)    0.0185 (35)   
Watts–Strogatz   0.0026 (3246)  0.0026 (3243)  0.0026 (3659)  0.0025 (3908)   

                          Top9          Top10  
Erdős–Rényi      0.0009 (1134)  0.0009 (6500)  
Barabási–Albert    0.0178 (24)     0.0160 (0)  
Watts–Strogatz    0.0025 (190)  0.0025 (1956)  

Eigenvector Centrality

In [138]:
r={}
for name, G in graphs.items():
  top10 = sorted(nx.eigenvector_centrality(G).items(), key = lambda x: x[1], reverse=True)[:10]
  r[name] = [f"{score:.4f} ({node})" for node, score in top10]

df = pd.DataFrame(r, index=["Top 1", "Top 2", "Top 3", "Top 4", "Top 5","Top6","Top7","Top8","Top9","Top10"]).T
print(df)
                         Top 1          Top 2          Top 3          Top 4  \
Erdős–Rényi      0.0250 (4845)   0.0249 (319)   0.0247 (493)  0.0245 (5863)   
Barabási–Albert    0.1661 (19)    0.1590 (15)    0.1552 (16)    0.1477 (18)   
Watts–Strogatz   0.0207 (2712)  0.0203 (1293)  0.0198 (3083)  0.0198 (2103)   

                         Top 5           Top6           Top7           Top8  \
Erdős–Rényi      0.0236 (1404)   0.0231 (959)  0.0229 (1564)  0.0228 (5046)   
Barabási–Albert    0.1403 (17)    0.1235 (20)    0.1234 (21)     0.1117 (0)   
Watts–Strogatz    0.0198 (750)  0.0198 (2714)  0.0196 (1290)  0.0194 (2710)   

                          Top9          Top10  
Erdős–Rényi      0.0226 (2864)  0.0226 (5511)  
Barabási–Albert    0.1099 (27)    0.1048 (25)  
Watts–Strogatz   0.0194 (2715)  0.0193 (1297)  

Page Rank

In [142]:
r={}
for name, G in graphs.items():
  top10 = sorted(nx.pagerank(G).items(), key = lambda x: x[1], reverse=True)[:10]
  r[name] = [f"{score:.4f} ({node})" for node, score in top10]

df = pd.DataFrame(r, index=["Top 1", "Top 2", "Top 3", "Top 4", "Top 5","Top6","Top7","Top8","Top9","Top10"]).T
print(df)
                         Top 1         Top 2          Top 3          Top 4  \
Erdős–Rényi      0.0003 (4845)  0.0003 (319)   0.0003 (493)  0.0003 (5046)   
Barabási–Albert    0.0024 (19)   0.0022 (16)    0.0022 (18)    0.0021 (15)   
Watts–Strogatz   0.0002 (1316)  0.0002 (371)  0.0002 (3246)   0.0002 (190)   

                         Top 5           Top6           Top7           Top8  \
Erdős–Rényi      0.0003 (5863)  0.0003 (6500)   0.0003 (424)  0.0003 (2864)   
Barabási–Albert    0.0020 (17)    0.0018 (21)    0.0017 (20)    0.0017 (35)   
Watts–Strogatz   0.0002 (3908)  0.0002 (2519)  0.0002 (6339)  0.0002 (3714)   

                          Top9          Top10  
Erdős–Rényi      0.0003 (5190)  0.0003 (5870)  
Barabási–Albert    0.0016 (24)    0.0015 (27)  
Watts–Strogatz   0.0002 (3570)  0.0002 (5551)  

Comparing with Real Network - The Watts - strogatz showes highest clustering coefficient, but other graph like Erdos and Barabasi graph shows low clusterin. when we talk about degree distribution the Wiki Vote is highly skewed, while WS degree distrivution is narrow and regular `but good for real world distribution.

Removing hight centrality node In WS

In [146]:
import networkx as nx
import matplotlib.pyplot as plt

# Step 1: Get clustering coefficients
clustering = nx.clustering(WS)
highest_node = max(clustering, key=clustering.get)
highest_value = clustering[highest_node]

print(f"Removing node {highest_node} with clustering coefficient: {highest_value:.4f}")

# Step 2: Draw original graph with highest clustering node highlighted
pos = nx.spring_layout(WS, seed=42)

plt.figure(figsize=(12, 5))

# Original graph
plt.subplot(1, 2, 1)
plt.title("Original WS Graph (Highest Cluster Node Highlighted)")
node_colors = ['red' if n == highest_node else 'lightgray' for n in WS.nodes()]
node_sizes = [200 if n == highest_node else 50 for n in WS.nodes()]
nx.draw(WS, pos, node_color=node_colors, node_size=node_sizes, with_labels=False, edge_color='gray')

# Step 3: Remove node and draw updated graph
WS_modified = WS.copy()
WS_modified.remove_node(highest_node)

plt.subplot(1, 2, 2)
plt.title("WS Graph After Removing Highest Clustering Node")
nx.draw(WS_modified, pos, node_color='skyblue', node_size=50, with_labels=False, edge_color='gray')

plt.tight_layout()
plt.show()
Removing node 1248 with clustering coefficient: 0.7576
No description has been provided for this image

Community Experiment

In [147]:
partition =nx.community.louvain_communities(G, resolution=1, threshold=1e-07, max_level=None, seed=None)
num_communities = len(partition)
print(f"Number of communities detected (Louvain): {num_communities}")

colors = []

for node in G.nodes():

    colors.append(node)

fig, ax = plt.subplots(figsize=(size, size-4))

#for node
options = {"edgecolors": "tab:gray", "node_size": 15, "alpha": 0.75}
nx.draw_networkx_nodes(G, pos, node_color=colors,cmap=plt.cm.viridis, **options)

# for degree
nx.draw_networkx_edges(G, pos, width=0.6, alpha=0.3)
plt.title("Louvain Community Detection")
plt.show()
Number of communities detected (Louvain): 47
No description has been provided for this image

Linear Threshold Model

1st finding the most influential Nodes

In [152]:
def linear_threshold(G, nodes):
    # Assign random thresholds
    thresholds = {}
    for node in G.nodes:
        thresholds[node] = np.random.uniform(0, 1)

    # Assign random edge weights
    weights = {}
    for edge in G.edges:
        weights[edge] = np.random.uniform(0, 1)

    # Initialize active nodes
    active_nodes = set(nodes)

    # Iterative activation process
    newly_activated = True
    while newly_activated:
        newly_activated = False

        # Nodes that are not yet active
        inactive_nodes = set(G.nodes) - active_nodes

        for node in inactive_nodes:
            total_weight = 0

            for neighbor in G.neighbors(node):
                if neighbor in active_nodes:
                    total_weight += weights.get((neighbor, node), 0)
                    total_weight += weights.get((node, neighbor), 0)

            if total_weight >= thresholds[node]:
                active_nodes.add(node)
                newly_activated = True

    return active_nodes


# Select initial seed nodes  1 randomly
seed_nodes = np.random.choice(G.nodes, size=100, replace=False)

# Run the model
final_active_nodes = linear_threshold_model(G, seed_nodes)

def show_threshold(final_active_nodes):
    # Assign colors based on activation
    node_colors = []
    for node in G.nodes:
        if node in final_active_nodes:
            node_colors.append("red")
        else:
            node_colors.append("blue")

    # Draw the graph
    fig, ax = plt.subplots(figsize=(size, size-4))
    pos = nx.spring_layout(G, seed=42)
    # nodes
    options = {"edgecolors": "tab:orange", "node_size": 15}
    nx.draw_networkx_nodes(G, pos, node_color=node_colors,cmap=plt.cm.viridis, **options)

    # edges
    nx.draw_networkx_edges(G, pos, width=0.6, alpha=0.3)
    plt.title("Linear Threshold Model")
    plt.show()
show_threshold(final_active_nodes)
No description has been provided for this image
In [166]:
def initialize_thresholds_and_weights(graph, threshold_range=(0, 1), weight_range=(0, 1)):
    """Assign random thresholds and edge weights for each node."""
    thresholds = {}
    weights = {}

    for node in graph.nodes():
        thresholds[node] = np.random.uniform(*threshold_range)
        weights[node] = {
            neighbor: np.random.uniform(*weight_range)
            for neighbor in graph.neighbors(node)
        }

    return thresholds, weights

def linear_threshold_diffusion(graph, seeds, thresholds, weights):
    """Spread influence using Linear Threshold Model."""
    active_nodes = set(seeds)
    newly_active = set(seeds)

    while newly_active:
        next_active = set()
        for node in graph.nodes():
            if node not in active_nodes:
                influence = sum(
                    weights[node].get(neighbor, 0)
                    for neighbor in graph.neighbors(node)
                    if neighbor in active_nodes
                )
                if influence >= thresholds[node]:
                    next_active.add(node)
        newly_active = next_active
        active_nodes.update(newly_active)

    return active_nodes

def plot_activation(graph, active_nodes, title="Linear Threshold Model"):
    """Visualize active and inactive nodes."""
    pos = nx.spring_layout(graph, seed=42)
    fig, ax = plt.subplots(figsize=(10, 6))
    nx.draw_networkx_edges(graph, pos, width=0.6, alpha=0.3)
    nx.draw_networkx_nodes(graph, pos,
                           nodelist=active_nodes,
                           node_color='red',
                           node_size=10,
                           label='Active')
    nx.draw_networkx_nodes(graph, pos,
                           nodelist=set(graph.nodes()) - active_nodes,
                           node_color='blue',
                           node_size=10,
                           label='Inactive')
    plt.title(title)
    plt.legend()
    plt.axis("off")
    plt.show()

# === Main Flow ===

# Step 1: Assign thresholds and weights
thresholds, weights = initialize_thresholds_and_weights(G)

# Step 2: Choose top PageRank nodes as seeds
page_dict = nx.pagerank(G)
top_nodes = sorted(page_dict, key=page_dict.get, reverse=True)[:10]

# Step 3: Run the model
final_active_nodes = linear_threshold_diffusion(G, top_nodes, thresholds, weights)

# Step 4: Visualize result
plot_activation(G, final_active_nodes, title="LTM with Top PageRank Nodes")
No description has been provided for this image

The diameter based Linear threshold model effectly demonstrate a influence through network based to compared to random activation and high degree selection.By sending top PAGE RANK NODES IN the Wiki-votes and activates large portionof the network.

Reference

J. Leskovec, D. Huttenlocher, J. Kleinberg. Signed Networks in Social Media. CHI 2010. J. Leskovec, D. Huttenlocher, J. Kleinberg. Predicting Positive and Negative Links in Online Social Networks. WWW 2010. Anon, (n.d.).

‌